/*
 * TOP SECRET Copyright 2006-2015 Transsion.com All right reserved. This software is the confidential and proprietary
 * information of Transsion.com ("Confidential Information"). You shall not disclose such Confidential Information and
 * shall use it only in accordance with the terms of the license agreement you entered into with Transsion.com.
 */
package com.yunji.framework_template.biz.crawler.ke;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.annotation.Resource;

import org.springframework.stereotype.Service;

import com.yunji.framework_template.biz.crawler.ContentImageHandler;
import com.yunji.framework_template.biz.crawler.NewsCrawler;
import com.yunji.framework_template.biz.crawler.SourceType;
import com.yunji.framework_template.common.enumeration.CountryCode;
import com.yunji.framework_template.common.enumeration.NewsType;

/**
 * ClassName:NationNewsCrawler <br/>
 * Date: 2018年12月18日 下午3:31:24 <br/>
 * 
 * @author fenglibin1982@163.com
 * @Blog http://blog.csdn.net/fenglibing
 * @version
 * @see
 */
@Service
public class NationNewsCrawler extends NewsCrawler {

    @Resource
    private NationNewsImageHandler nationNewsImageHandler;

    @Override
    public boolean isOkUrl(String url) {
        // 新闻详情url格式如：https://www.nation.co.ke/business/Inmates-sue-to-raise-20-cents-pay-per-day/996-4899948-bc4wsn/index.html
        // 通过判断url的path中是否包括年月来判断其是否为新闻详情页
        URI uri;
        try {
            if (url == null || url.trim().length() == 0) {
                return false;
            }
            if (url.startsWith("mailto") || url.toLowerCase().startsWith("javascript")) {
                return false;
            }
            uri = new URI(url);
            String host = uri.getHost();
            if(host==null) {
                return false;
            }
            if (host.indexOf("nation.co.ke") < 0) {
                return false;
            }
            String path = uri.getPath();
            String[] pathArr = path.split("/");
            if ("www.nation.co.ke".equals(host)) {
                if (pathArr.length < 5) {
                    return false;
                }
                if (pathArr.length > 5) {
                    return true;
                }
                String titleStr = pathArr[2];
                if (titleStr.split("-").length > 1) {
                    return true;
                }
            } else if ("ntv.nation.co.ke".equals(host)) {
                if (path.startsWith("/news/") && pathArr.length == 4) {
                    return true;
                }
            }
        } catch (URISyntaxException e) {
        }
        return false;
    }

    @Override
    public ContentImageHandler getContentImageHandler() {
        return nationNewsImageHandler;
    }

    @Override
    public List<SourceType> getSourceTypeList() {
        List<SourceType> sourceTypeList = new ArrayList<SourceType>();
        String host = "https://www.nation.co.ke/";
        String host2 = "https://ntv.nation.co.ke/";
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "news/1056-1056-u6geog/index.html").newsType(NewsType.NEWS).build());
        sourceTypeList.add(SourceType.builder().url(host).newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "news/politics/1064-1064-4f88toz/index.html").newsType(NewsType.POLITICS).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "news/africa/1066-1066-oo1nedz/index.html").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "news/world/1068-1068-y0kl4cz/index.html").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "business/996-996-x0uutpz/index.html").newsType(NewsType.BUSINESS).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "business/seedsofgold/2301238-2301238-e2g5gz/index.html").newsType(NewsType.BUSINESS).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/1107872-1107872-44rd16z/index.html").newsType(NewsType.CAR).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/coast/3347800-3347800-ey6rn8z/index.html").newsType(NewsType.EDUCATION).build());

        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/eastern-region/3347810-3347810-2501vq/index.html").newsType(NewsType.ENTERTAINMENT).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/mtkenya/3317830-3317830-7p8g5w/index.html").newsType(NewsType.FINANCE).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/northern-region/3347734-3347734-155vfigz/index.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/north-rift/3347778-3347778-aga2m3z/index.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/south-rift/3347782-3347782-rtpd5y/index.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/nyanza-kisii-region/3347796-3347796-unywdg/index.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "counties/western-region/3347786-3347786-nvg1d5/index.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "sports/1090-1090-iqcgwe/index.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "sports/football/1102-1102-5p3gunz/index.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "sports/athletics/1100-1100-g06rtez/index.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "sports/rugby/1106-1106-g0i5l0z/index.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "sports/golf/1104-1104-hjqyif/index.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "sports/othersports/1951306-1951306-fy284lz/index.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "sports/talkup/441392-441392-wkoe7h/index.html").newsType(NewsType.SPORT).build());

        sourceTypeList.add(SourceType.builder().url(host
                                                    + "oped/1192-1192-110kbth/index.html").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "oped/blogs/620-620-obkqwq/index.html").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "oped/opinion/440808-440808-h8w00fz/index.html").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "oped/editorial/440804-440804-id1ldp/index.html").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "oped/cartoon/454986-454986-14tce6c/index.html").newsType(NewsType.BLOG).build());

        sourceTypeList.add(SourceType.builder().url(host
                                                    + "lifestyle/1190-1190-5p56avz/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "lifestyle/artculture/1954194-1954194-7hp6o1/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "lifestyle/family/1954198-1954198-jeojfi/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "lifestyle/health/1954202-1954202-rynt7v/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "lifestyle/showbiz/1950810-1950810-25qq0az/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "lifestyle/travel/1950822-1950822-ry3yf4/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "lifestyle/women/1950830-1950830-b0uq99z/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url(host
                                                    + "lifestyle/magazines/2439396-2439396-u46yvl/index.html").newsType(NewsType.LIFESTYLE).build());

        sourceTypeList.add(SourceType.builder().url(host
                                                    + "video/1951480-1951480-1072vroz/index.html").newsType(NewsType.VIDEO).build());

        sourceTypeList.add(SourceType.builder().url(host
                                                    + "photo/1951220-1951220-df7whxz/index.html").newsType(NewsType.IMAGE).build());
        
        sourceTypeList.add(SourceType.builder().url(host2+"2720202-2720202-13isx0f/index.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url(host2+"news/2720124-2720124-mohb4/index.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url(host2+"features/2720352-2720352-12fgdj4z/index.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url(host2+"business/2720224-2720224-cn15j9z/index.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url(host2+"sports/2720356-2720356-10biibpz/index.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url(host2+"vr/4537750-4537750-dfuggv/index.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url(host2+"live/2740874-2740874-4yhxxuz/index.html").newsType(NewsType.VIDEO).build());

        return sourceTypeList;
    }

    @Override
    public Set<String> getCountryCodeSet() {
        Set<String> countryCodeSet = new HashSet<String>();
        countryCodeSet.add(CountryCode.KE.name());
        return countryCodeSet;
    }

}
